In [110]:
%matplotlib inline

Import custom scripts

In [111]:
path_scripts = '/mnt/kauffman/joosts/projects/STRT_epidermis/scripts'
In [112]:
import sys
sys.path.append(path_scripts)
In [113]:
from EPI_misc_scripts_v1_1 import *
from EPI_affinity_propagation_v1_0 import *
from EPI_neg_binom_regression_v1_1 import *
from EPI_pseudotemporal_ordering_v1_0 import *
from EPI_gene_neighbor_network_v1_0 import *
In [114]:
import matplotlib as mpl
from sklearn.ensemble import RandomForestClassifier

Initialize engines

In [115]:
from ipyparallel import Client
In [116]:
c = Client(profile='default')
In [117]:
dview = c[:]
In [118]:
dview.execute('import sys')
dview.execute('sys.path.append("/mnt/kauffman/joosts/projects/STRT_epidermis/scripts")')
dview.execute('from EPI_misc_scripts_v1_1 import *')
dview.execute('from EPI_affinity_propagation_v1_0 import *')
dview.execute('from EPI_neg_binom_regression_v1_1 import *')
dview.execute('from EPI_pseudotemporal_ordering_v1_0 import *')
dview.execute('from EPI_gene_neighbor_network_v1_0 import *')
Out[118]:
<AsyncResult: finished>

Import Data

In [119]:
exp_id = '201509151726'
path_input = '/mnt/kauffman/joosts/projects/STRT_epidermis/data_input/v1.8'
path_output = '/mnt/kauffman/joosts/projects/STRT_epidermis/data_output/v1.8'
path_figures = '/mnt/kauffman/joosts/projects/STRT_epidermis/figures/v1.8'
In [120]:
seq = loadData_v1(path_input, exp_id, 'seq', 'DataFrame')
meta = loadData_v1(path_input, exp_id, 'meta', 'DataFrame')
In [121]:
seq_ = log2Transform(dropNull(seq, path_input, cutoff_mean = 0.00))
Dropping spikes from dataset

Dropping repeats from dataset

Dropping unexpressed genes from dataset

Calculating binary logarithm of x + 1
In [122]:
seq_sel_IFE = loadData_v1(path_output, exp_id, 'seq_sel_IFE', 'DataFrame')
In [123]:
s_groups_1st = loadData_v1(path_output, exp_id, 's_groups_1st', 'Series')
g_groups_1st = loadData_v1(path_output, exp_id, 'g_groups_1st', 'Series')
In [124]:
s_groups_2nd = loadData_v1(path_output, exp_id, 's_groups_2nd', 'Series')
In [125]:
tsne_1st = loadData_v1(path_output, exp_id, 'tsne_1st', 'DataFrame')
In [126]:
tsne_IFE = loadData_v1(path_output, exp_id, 'tsne_IFE', 'DataFrame')
In [127]:
PTO_coords_IFE = loadData_v1(path_output, exp_id, 'PTO_coords_IFE', 'Series')
In [128]:
PTO_coords_IFE_multi = loadData_v1(path_output, exp_id, 'PTO_coords_IFE_multi', 'Series')
PTO_coords_IFE_tSNE_it = loadData_v1(path_output, exp_id, 'PTO_coords_IFE_tSNE_it', 'DataFrame')
PTO_coords_IFE_tSNE_res = loadData_v1(path_output, exp_id, 'PTO_coords_IFE_tSNE_res', 'DataFrame')
PTO_coords_IFE_tSNE_shf = loadData_v1(path_output, exp_id, 'PTO_coords_IFE_tSNE_shf', 'DataFrame')
In [129]:
NBR_1st_summary = loadData_from_pickle_v1(path_output, exp_id, 'NBR_1st_summary')
In [130]:
IFE_fitted = loadData_v1(path_output, exp_id, 'IFE_fitted', 'DataFrame')
IFE_stats = loadData_v1(path_output, exp_id, 'IFE_stats', 'DataFrame')
In [131]:
g_groups_IFE = loadData_v1(path_output, exp_id, 'g_groups_IFE', 'Series')
In [132]:
IFE_corr_max = loadData_v1(path_output, exp_id, 'IFE_corr_max', 'Series')
In [133]:
NBR_2nd_summary = loadData_from_pickle_v1(path_output, exp_id,'NBR_2nd_summary')
NBR_2nd_bin_bl = loadData_from_pickle_v1(path_output,exp_id,'NBR_2nd_bin_bl')
NBR_2nd_size_bl = loadData_from_pickle_v1(path_output,exp_id,'NBR_2nd_size_bl')
NBR_2nd_bin_gr = loadData_from_pickle_v1(path_output,exp_id,'NBR_2nd_bin_gr')
NBR_2nd_size_gr = loadData_from_pickle_v1(path_output,exp_id,'NBR_2nd_size_gr')

Colormaps

In [134]:
cmap_1st = {2:'#00CC00',
          1:'#FFE000',
          8:'#FF9900',
          0:'#FF3300',
          10:'#CC0000',
          6:'#cab2d6',
          3:'#A68BC2',
          4:'#65429A',
          5:'#000066',
          7:'#0066FF',
          9:'#33CCCC',
          11:'#FF99CC',
          12:'#660033'}
In [135]:
cmap_g_IFE = {1:'#1a9850',
              5:'#a6d96a',
              0:'#ffffbf',
              2:'#fee08b',
              3:'#fdae61',
              7:'#f46d43',
              4:'#d73027',
              6:'#a50026'}
In [136]:
nmap_g_IFE = {1:'I',
              5:'II',
              0:'III',
              2:'IV',
              3:'V',
              7:'VI',
              4:'VII',
              6:'VIII'}
In [137]:
C = open('%s/cmap_Gn_Yl_Rd.txt' % path_input,'r').read().split()

cmap_Gn_Yl_Rd = mpl.colors.ListedColormap(C)
In [138]:
cmap_2nd = {0: '#33a02c',
            1: '#b2df8a',
            2: '#00FF00',
            3:'#FFE000',
            4:'#FF9900',
            5:'#FF3300',
            6:'#CC0000',
            7:'#CC0066',
            8:'#FF99CC',
            9:'#FFCCCC',
            10:'#D2C5E1',
            11:'#A68BC2',
            12:'#6a3d9a',
            13:'#2A183E',
            14:'#000066',
            15:'#0000FF',
            16:'#33CCFF',
            17:'#99CCFF',
            18:'#666699',
            19:'#000066',
            20:'#33CCCC',
            21:'#00FFFF',
            22:'#006666',
            23:'#FF99CC',
            24:'#660033'}
In [139]:
nmap_2nd_short = {0:'IFE B I',
                  1:'IFE B II',
                  2:'INFU B',
                  3:'IFE D I',
                  4:'IFE D II',
                  5:'IFE K I',
                  6:'IFE K II',
                  7:'uHF I',
                  8:'uHF II',
                  9:'uHF III',
                  10:'uHF IV',
                  11:'uHF V',
                  12:'uHF VI',
                  13:'uHF VII',
                  14:'SG',
                  15:'OB I',
                  16:'OB II',
                  17:'OB III',
                  18:'OB IV',
                  19:'OB V',
                  20:'IB I',
                  21:'IB II',
                  22:'IB III',
                  23:'TC',
                  24:'LH'}
In [140]:
markers_2nd = {0: 'o',
            1: 'o',
            2: 'o',
            3:'s',
            4:'s',
            5:'s',
            6:'s',
            7:'^',
            8:'^',
            9:'^',
            10:'^',
            11:'^',
            12:'^',
            13:'^',
            14:'s',
            15:'D',
            16:'D',
            17:'D',
            18:'D',
            19:'D',
            20:'H',
            21:'H',
            22:'H',
            23:'s',
            24:'s'}
In [141]:
markers_2nd_size = {0:750,
            1:750,
            2:750,
            3:750,
            4:750,
            5:750,
            6:750,
            7:750,
            8:750,
            9:750,
            10:750,
            11:750,
            12:750,
            13:750,
            14:750,
            15:500,
            16:500,
            17:500,
            18:500,
            19:500,
            20:750,
            21:750,
            22:750,
            23:750,
            24:750}

t-SNE

In [142]:
#calculate dist mat with TSNE data

dist_mat_tsne = pairwise_distance_2d(tsne_IFE)
In [143]:
#generate MST and diameter path based on correlation-defined edge weight

MST, MST_pos = PTO_create_MST_2d(dist_mat_tsne)
diam_edges = PTO_diameter_path(MST, return_edges = True)
Diameter path between 1772067075-C08 and 1772067075-A05 with lenght 132
In [144]:
#bring TSNE positions in networkx compatible format

MST_pos_tsne = {}

for ix in tsne_IFE.index:
    
    MST_pos_tsne[ix] = (tsne_IFE.ix[ix,'x'], tsne_IFE.ix[ix,'y'])
In [155]:
#initialize figure

height = 12
width = 12

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#define x- and y-limits

x_min, x_max = np.min(tsne_IFE['x']), np.max(tsne_IFE['x'])
y_min, y_max = np.min(tsne_IFE['y']), np.max(tsne_IFE['y'])
x_diff, y_diff = x_max - x_min, y_max - y_min

pad = 1.0

if x_diff > y_diff:
    xlim = (x_min - pad, x_max + pad)
    ylim = (y_min * (x_diff / y_diff) - pad, y_max * (x_diff / y_diff) + pad)

if x_diff < y_diff:
    xlim = (x_min * (y_diff/x_diff) - pad, x_max * (y_diff/x_diff) + pad)
    ylim = (y_min - pad, y_max + pad)

text_pad = 10

#

#ylim = (ylim[0] - 2 * pad, ylim[1] - 2 * pad)
    
#draw groups

ax1 = plt.subplot()

ax1.set_xlim(xlim)
ax1.set_ylim(ylim)

remove_ticks(ax1)

clist_tsne = [cmap_1st[s_groups_1st[ix]] for ix in MST.nodes()]

nx.draw_networkx_edges(MST, pos = MST_pos_tsne, ax = ax1, with_labels = False, node_size = 125, linewidths = 0.0, 
                width = 1.0, edge_color = 'black', node_color = clist_tsne, vmin = 0, vmax = 1.0,
                edgecolor=clist_tsne)

nx.draw_networkx_edges(MST, pos = MST_pos_tsne, ax = ax1, edgelist = diam_edges.edges(), width = 7.5)

ax1.scatter([MST_pos_tsne[ix][0] for ix in MST.nodes()],
            [MST_pos_tsne[ix][1] for ix in MST.nodes()],
            s = 125,
            color = clist_tsne,
            linewidth = 0.0,
            edgecolor = clist_tsne)

clean_axis(ax1)

figname = 'v1.8_3_A_IFE_t-SNE.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [36]:
#initialize figure

height = 5
width = 5

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#draw inset

ax_inset = plt.subplot()

remove_ticks(ax_inset)

ix_sel = dist_mat_tsne.index

clist_tsne_inset = [cmap_1st[s_groups_1st[ix]] for ix in ix_sel]

ax_inset.scatter(tsne_1st['x'],
            tsne_1st['y'], 
            s = 20,
            linewidth = 0.0,
            c = 'silver',
            cmap = plt.cm.jet,
            vmin = 0, vmax = 1.0)

ax_inset.scatter(tsne_1st.ix[ix_sel, 'x'],
            tsne_1st.ix[ix_sel, 'y'], 
            s = 20,
            linewidth = 0.0,
            c = clist_tsne_inset,
            cmap = plt.cm.jet,
            vmin = 0, vmax = 1.0,
            edgecolor = clist_tsne_inset)

for s in ['top','bottom','left','right']:
    ax_inset.spines[s].set_linewidth(2)

figname = 'v1.8_S3_A_t-SNE_IFE_Inset.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Validation of PTO with markers

In [37]:
dataset = log2Transform(seq)
PTO_coords = PTO_coords_IFE
cell_groups = s_groups_1st
tsne = tsne_IFE
fitted = log2Transform(IFE_fitted)
cmap = cmap_1st

cmap_tsne = plt.cm.seismic

#fontsizes

size_ticklabs = 30
size_ylab = 35
size_xlab = 35
size_leglab = 35

#initialize figure

height = 16
width = 36

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#initialize GridSpec

gs1 = plt.GridSpec(4, 4, hspace=0.1, wspace = 0.25, height_ratios=[6,0.5,0.2,6])

#define xlim

xlim = (np.min(PTO_coords.values), np.max(PTO_coords.values))

#define x- and y-limits for tsne plots

x_min_tsne, x_max_tsne = np.min(tsne['x']), np.max(tsne['x'])
y_min_tsne, y_max_tsne= np.min(tsne['y']), np.max(tsne['y'])
x_diff_tsne, y_diff_tsne = x_max_tsne - x_min_tsne, y_max_tsne - y_min_tsne

pad = 2.0

if x_diff_tsne > y_diff_tsne:
    xlim_tsne = (x_min_tsne - pad, x_max_tsne + pad)
    ylim_tsne = (y_min_tsne * (x_diff_tsne / y_diff_tsne) - pad, y_max_tsne * (x_diff_tsne / y_diff_tsne) + pad)

if x_diff_tsne < y_diff_tsne:
    xlim_tsne = (x_min_tsne * (y_diff_tsne/x_diff_tsne) - pad, x_max * (y_diff_tsne/x_diff_tsne) + pad)
    ylim_tsne = (y_min_tsne - pad, y_max_tsne + pad)

text_pad = 2

#plot Krt14

g = 'Krt14'

ax1 = plt.subplot(gs1[0,0])

ax1.set_xlim(xlim)
ax1.set_xticks([])

ax1.set_ylim(0, 9)
for t in ax1.get_yticklabels():
    t.set_family('Liberation Sans')
    t.set_fontsize(size_ticklabs)
ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

ax1.scatter(PTO_coords,
            dataset.ix[g, PTO_coords.index],
            c = clist_tmp,
            s = 50,
            linewidth = 0.0,
            vmin = 0, vmax = 1.0,
            edgecolor = clist_tmp)

ax1.plot(np.arange(0, np.max(PTO_coords)),
         list(fitted.ix[g]),
         color = 'black',
         linewidth = 2)

ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

#(b) Colorbar

ax1b = plt.subplot(gs1[1,0])

ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

clean_axis(ax1b)

#(c) TSNE

ax1c = plt.subplot(gs1[3,0])

ax1c.set_xlim(xlim_tsne)
ax1c.set_ylim(ylim_tsne)

remove_ticks(ax1c)

max_tmp = dataset.ix[g, tsne.index].max()

ax1c.scatter(tsne['x'],
             tsne['y'],
             c = dataset.ix[g, tsne.index],
             cmap = cmap_tsne,
             s = 50, linewidth = 0.0,
             edgecolor = [cmap_tsne(val / max_tmp) for val in dataset.ix[g, tsne.index]])

clean_axis(ax1c)

#plot Mt4

g = 'Mt4'

ax1 = plt.subplot(gs1[0,1])

ax1.set_xlim(xlim)
ax1.set_xticks([])

ax1.set_ylim(0, 7)
for t in ax1.get_yticklabels():
    t.set_family('Liberation Sans')
    t.set_fontsize(size_ticklabs)
ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

ax1.scatter(PTO_coords,
            dataset.ix[g, PTO_coords.index],
            c = clist_tmp,
            s = 50,
            linewidth = 0.0,
            vmin = 0, vmax = 1.0,
            edgecolor = clist_tmp)

ax1.plot(np.arange(0, np.max(PTO_coords)),
         list(fitted.ix[g]),
         color = 'black',
         linewidth = 2)

ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

#(b) Colorbar

ax1b = plt.subplot(gs1[1,1])

ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

clean_axis(ax1b)

#(c) TSNE

ax1c = plt.subplot(gs1[3,1])

ax1c.set_xlim(xlim_tsne)
ax1c.set_ylim(ylim_tsne)

remove_ticks(ax1c)

max_tmp = dataset.ix[g, tsne.index].max()

ax1c.scatter(tsne['x'],
             tsne['y'],
             c = dataset.ix[g, tsne.index],
             cmap = cmap_tsne,
             s = 50, linewidth = 0.0,
             edgecolor = [cmap_tsne(val / max_tmp) for val in dataset.ix[g, tsne.index]])

clean_axis(ax1c)

#plot Krt10

g = 'Krt10'

ax1 = plt.subplot(gs1[0,2])

ax1.set_xlim(xlim)
ax1.set_xticks([])

ax1.set_ylim(0, 10)
for t in ax1.get_yticklabels():
    t.set_family('Liberation Sans')
    t.set_fontsize(size_ticklabs)
ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

ax1.scatter(PTO_coords,
            dataset.ix[g, PTO_coords.index],
            c = clist_tmp,
            s = 50,
            linewidth = 0.0,
            vmin = 0, vmax = 1.0,
            edgecolor = clist_tmp)

ax1.plot(np.arange(0, np.max(PTO_coords)),
         list(fitted.ix[g]),
         color = 'black',
         linewidth = 2)

ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

#(b) Colorbar

ax1b = plt.subplot(gs1[1,2])

ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

clean_axis(ax1b)

#(c) TSNE

ax1c = plt.subplot(gs1[3,2])

ax1c.set_xlim(xlim_tsne)
ax1c.set_ylim(ylim_tsne)

remove_ticks(ax1c)

max_tmp = dataset.ix[g, tsne.index].max()

ax1c.scatter(tsne['x'],
             tsne['y'],
             c = dataset.ix[g, tsne.index],
             cmap = cmap_tsne,
             s = 50, linewidth = 0.0,
             edgecolor = [cmap_tsne(val / max_tmp) for val in dataset.ix[g, tsne.index]])

clean_axis(ax1c)

#plot Lor

g = 'Lor'

ax1 = plt.subplot(gs1[0,3])

ax1.set_xlim(xlim)
ax1.set_xticks([])

ax1.set_ylim(0, 10)
for t in ax1.get_yticklabels():
    t.set_family('Liberation Sans')
    t.set_fontsize(size_ticklabs)
ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

ax1.scatter(PTO_coords,
            dataset.ix[g, PTO_coords.index],
            c = clist_tmp,
            s = 50,
            linewidth = 0.0,
            vmin = 0, vmax = 1.0,
            edgecolor = clist_tmp)

ax1.plot(np.arange(0, np.max(PTO_coords)),
         list(fitted.ix[g]),
         color = 'black',
         linewidth = 2)

ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

#(b) Colorbar

ax1b = plt.subplot(gs1[1,3])

ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

clean_axis(ax1b)

#(c) TSNE

ax1c = plt.subplot(gs1[3,3])

ax1c.set_xlim(xlim_tsne)
ax1c.set_ylim(ylim_tsne)

remove_ticks(ax1c)

max_tmp = dataset.ix[g, tsne.index].max()

ax1c.scatter(tsne['x'],
             tsne['y'],
             c = dataset.ix[g, tsne.index],
             cmap = cmap_tsne,
             s = 50, linewidth = 0.0,
             edgecolor = [cmap_tsne(val / max_tmp) for val in dataset.ix[g, tsne.index]])

clean_axis(ax1c)

figname = 'v1.8_3_B_Diff_validation.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
Calculating binary logarithm of x + 1

Calculating binary logarithm of x + 1

Validation of t-SNE for PTO

In [38]:
PTO_coords_IFE_multi_corr = [PTO_coords_IFE.values,PTO_coords_IFE_multi[PTO_coords_IFE.index].values]
In [39]:
#invert pseudotime if necessary and normalize data

ch_1, ch_2 = [x for x in chunks(PTO_coords_IFE.index, len(PTO_coords_IFE.index) / 2)]

for ix in PTO_coords_IFE_tSNE_it.index:
    max_tmp = PTO_coords_IFE_tSNE_it.ix[ix].max()

    if PTO_coords_IFE_tSNE_it.ix[ix, ch_1].mean() > PTO_coords_IFE_tSNE_it.ix[ix, ch_2].mean():
            PTO_coords_IFE_tSNE_it.ix[ix] = (PTO_coords_IFE_tSNE_it.ix[ix] - max_tmp) * -1
            
PTO_coords_IFE_tSNE_it = PTO_coords_IFE_tSNE_it.apply(lambda x: x / np.max(x), axis = 1)
In [40]:
#define fused list of corr_analysis

X, Y = [], []

for ix in PTO_coords_IFE_tSNE_it.index:
    
    X += list(PTO_coords_IFE[PTO_coords_IFE_tSNE_it.ix[ix].index])
    Y += list(PTO_coords_IFE_tSNE_it.ix[ix])
    
PTO_coords_IFE_tSNE_it_corr = [X,Y]
In [41]:
#invert pseudotime if necessary and normalize data

for ix in PTO_coords_IFE_tSNE_res.index:
    ix_tmp = PTO_coords_IFE_tSNE_res.ix[ix, PTO_coords_IFE.index][~PTO_coords_IFE_tSNE_res.ix[ix, PTO_coords_IFE.index].isnull()].index
    
    ch_1, ch_2 = [x for x in chunks(PTO_coords_IFE[ix_tmp].index, len(PTO_coords_IFE[ix_tmp].index) / 2)]
    max_tmp = PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp].max()

    if PTO_coords_IFE_tSNE_res.ix[ix, ch_1].mean() > PTO_coords_IFE_tSNE_res.ix[ix, ch_2].mean():
            PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp] = (PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp] - max_tmp) * -1
            
    PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp] = PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp] / max_tmp
In [42]:
#define fused list of corr_analysis

X, Y = [], []

for ix in PTO_coords_IFE_tSNE_res.index:
    
    ix_tmp = PTO_coords_IFE_tSNE_res.ix[ix, PTO_coords_IFE.index][~PTO_coords_IFE_tSNE_res.ix[ix, PTO_coords_IFE.index].isnull()].index

    X += list(PTO_coords_IFE[PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp].index])
    Y += list(PTO_coords_IFE_tSNE_res.ix[ix, ix_tmp])
    
PTO_coords_IFE_tSNE_res_corr = [X,Y]
In [43]:
#invert pseudotime if necessary and normalize data

ch_1, ch_2 = [x for x in chunks(PTO_coords_IFE.index, len(PTO_coords_IFE.index) / 2)]

for ix in PTO_coords_IFE_tSNE_shf.index:
    max_tmp = PTO_coords_IFE_tSNE_shf.ix[ix].max()

    if PTO_coords_IFE_tSNE_shf.ix[ix, ch_1].mean() > PTO_coords_IFE_tSNE_shf.ix[ix, ch_2].mean():
            PTO_coords_IFE_tSNE_shf.ix[ix] = (PTO_coords_IFE_tSNE_shf.ix[ix] - max_tmp) * -1

    
PTO_coords_IFE_tSNE_shf = PTO_coords_IFE_tSNE_shf.apply(lambda x: x / np.max(x), axis = 1)
In [44]:
#define fused list of corr_analysis

X, Y = [], []

for ix in PTO_coords_IFE_tSNE_shf.index:
    
    X += list(PTO_coords_IFE[PTO_coords_IFE_tSNE_shf.ix[ix].index])
    Y += list(PTO_coords_IFE_tSNE_shf.ix[ix])
    
PTO_coords_IFE_tSNE_shf_corr = [X,Y]
In [180]:
#initialize figure

height = 8
width = 36

fig = plt.figure(facecolor = 'w', figsize = (width, height))

gs = plt.GridSpec(1, 4, wspace = 0.25)

###############################################################
##### tsne vs. correlation distance

#define data

x_data = PTO_coords_IFE[PTO_coords_IFE.index].values
y_data = PTO_coords_IFE_multi[PTO_coords_IFE.index].values
corr = PTO_coords_IFE_multi_corr

ax = plt.subplot(gs[0])

#define x-axis

ax.set_xlim(x_data.min(), x_data.max())

ax.set_xlabel('Pseudotime\n(Euclidean distance\nin 2D tSNE space)', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.025)
ax.xaxis.set_ticks([])

#define y-axis

ax.set_ylim(y_data.min(), y_data.max())

ax.set_ylabel('Pseudotime\n(Pearson correlation)', family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.025)
ax.yaxis.set_ticks([])

#plot data

ax.scatter(x_data,
           y_data,
           c = 'dodgerblue', 
           s= 50, 
           edgecolor = 'dodgerblue')

#print corr

ax.xaxis.set_ticks_position('top')
ax.set_xticks([np.max(PTO_coords_IFE) * 0.5])
ax.set_xticklabels(['R = %.2f' % scipy.stats.pearsonr(corr[0], corr[1])[0]], 
                   family = 'Liberation Sans', fontsize = 35, color = 'red')

###############################################################

ax = plt.subplot(gs[1])

#define dataset

x_data = PTO_coords_IFE
y_data = PTO_coords_IFE_tSNE_it[PTO_coords_IFE.index]
corr = PTO_coords_IFE_tSNE_it_corr

#define x-axis

ax.set_xlim(PTO_coords_IFE.min(), PTO_coords_IFE.max())

ax.set_xlabel('Pseudotime\n(Euclidean distance\nin 2D tSNE space)', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.025)
ax.xaxis.set_ticks([])

#define y-axis

ax.set_ylim(0, 1)

ax.set_ylabel('Pseudotime\n(%s tSNE iterations)' % len(y_data.index), family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.025)
ax.yaxis.set_ticks([])

#normalize dataset

y_data = y_data.apply(lambda x: x / np.max(x), axis = 1)

#plot median and percentiles

ax.plot(x_data,
       y_data.median(axis = 0),
        color = 'black', 
        linewidth = 5)

ax.fill_between(x_data, 
                y_data.quantile(q = 0.05, axis = 0), 
                y_data.quantile(q = 0.95, axis = 0), 
                color = 'dodgerblue', alpha = 1)

ax.xaxis.set_ticks_position('top')
ax.set_xticks([np.max(PTO_coords_IFE) * 0.5])
ax.set_xticklabels(['R = %.2f' % scipy.stats.pearsonr(corr[0], corr[1])[0]], 
                   family = 'Liberation Sans', fontsize = 35, color = 'red')

###############################################################

ax = plt.subplot(gs[2])

#define dataset

x_data = PTO_coords_IFE
y_data = PTO_coords_IFE_tSNE_res[PTO_coords_IFE.index]
corr = PTO_coords_IFE_tSNE_res_corr

#define x-axis

ax.set_xlim(PTO_coords_IFE.min(), PTO_coords_IFE.max())

ax.set_xlabel('Pseudotime\n(Euclidean distance\nin 2D tSNE space)', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.025)
ax.xaxis.set_ticks([])

#define y-axis

ax.set_ylim(0, 1)

ax.set_ylabel('Pseudotime\n(%s tSNE iterations after resampling)' % len(y_data.index), family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.025)
ax.yaxis.set_ticks([])
    
#plot median and percentiles

ax.plot(x_data,
       y_data.median(axis = 0),
        color = 'black', 
        linewidth = 5)

ax.fill_between(x_data, 
                y_data.quantile(q = 0.05, axis = 0), 
                y_data.quantile(q = 0.95, axis = 0), 
                color = 'dodgerblue', alpha = 1)

ax.xaxis.set_ticks_position('top')
ax.set_xticks([np.max(PTO_coords_IFE) * 0.5])
ax.set_xticklabels(['R = %.2f' % scipy.stats.pearsonr(corr[0], corr[1])[0]], 
                   family = 'Liberation Sans', fontsize = 35, color = 'red')

###############################################################

ax = plt.subplot(gs[3])

#define dataset

x_data = PTO_coords_IFE
y_data = PTO_coords_IFE_tSNE_shf[PTO_coords_IFE.index]
corr = PTO_coords_IFE_tSNE_shf_corr

#define x-axis

ax.set_xlim(PTO_coords_IFE.min(), PTO_coords_IFE.max())

ax.set_xlabel('Pseudotime\n(Euclidean distance\nin 2D tSNE space)', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.025)
ax.xaxis.set_ticks([])

#define y-axis

ax.set_ylim(0, 1)

ax.set_ylabel('Pseudotime\n(%s tSNE iterations after shuffling)' % len(y_data.index), family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.025)
ax.yaxis.set_ticks([])

#normalize dataset

y_data = y_data.apply(lambda x: x / np.max(x), axis = 1)
    
#plot median and percentiles

ax.plot(x_data,
       y_data.median(axis = 0),
        color = 'black', 
        linewidth = 5)

ax.fill_between(x_data, 
                y_data.quantile(q = 0.05, axis = 0), 
                y_data.quantile(q = 0.95, axis = 0), 
                color = 'dodgerblue', alpha = 1)

ax.xaxis.set_ticks_position('top')
ax.set_xticks([np.max(PTO_coords_IFE) * 0.5])
ax.set_xticklabels(['R = %.2f' % scipy.stats.pearsonr(corr[0], corr[1])[0]], 
                   family = 'Liberation Sans', fontsize = 35, color = 'red')

figname = 'v1.8_S4_A_Pseudotime_t-SNE_validation.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Selection of pseudotime dependent genes

Select genes with p-value < Bonferroni(0.001)

In [218]:
bonferroni_IFE = 0.001 / len(IFE_stats.index)
In [219]:
genes_sel = IFE_stats['Pr(>Chisq)'][IFE_stats['Pr(>Chisq)'] < bonferroni_IFE].index
In [220]:
len(genes_sel)
Out[220]:
1627

Plot mean expression vs. p-value and selected genes

In [221]:
#create data

x_data = np.log2(seq.ix[IFE_stats.index, PTO_coords_IFE.index].mean(axis = 1))
y_data = -np.log10(IFE_stats.ix[IFE_stats.index, 'Pr(>Chisq)'])

#create color list

clist = pd.Series('silver', index = IFE_stats.index)
clist.ix[genes_sel] = 'green'

#initialize figure

height = 12
width = 12

fig = plt.figure(facecolor = 'w', figsize = (width, height))

ax = plt.subplot()

#define x-axis

ax.set_xlim(x_data.min(), x_data.max())

ax.set_xlabel('Average number of transcripts [$\log_2$]', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.075)
for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(35) 
                tick.label.set_family('Liberation Sans')   

#define y-axis

ax.set_ylim(y_data.min(), y_data.max())

ax.set_ylabel('Likelihood ratio test for\npseudotime dependency\n- P-value [$-\log_{10}$]', family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.075)
for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(35) 
                tick.label.set_family('Liberation Sans')   

#plot data

ax.scatter(x_data,
           y_data,
           c = clist, 
           linewidth = 0, 
           s= 50, 
           edgecolor = clist)

ax.axhline(y = -np.log10(bonferroni_IFE), c = 'r', linewidth = 5)
"""
figname = 'v1.8_S4_B_P-values_pseudotime.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
"""
Out[221]:
"\nfigname = 'v1.8_S4_B_P-values_pseudotime.pdf'\nplt.savefig('%s/%s' % (path_figures, figname), \n            format = 'pdf', \n            transparent = True, \n            bbox_inches = 'tight', \n            pad_inches = 0, \n            rasterized = True)\n"
In [217]:
dataset = log2Transform(seq)
PTO_coords = PTO_coords_IFE
cell_groups = s_groups_1st
tsne = tsne_IFE
fitted = log2Transform(IFE_fitted)
cmap = cmap_1st

cmap_tsne = plt.cm.seismic

genes = ['Sbsn','mt-Rnr2','Sdr9c7','Zc3hav1']
y_max_genes = [10,12,5,5]

#fontsizes

size_ticklabs = 30
size_ylab = 35
size_xlab = 35
size_leglab = 35

#iterate over genes

for ix, g in enumerate(genes):
    
    figname = 'v1.7_S6_G_PTO_%s.pdf' % g

    #initialize figure

    height = 6.6
    width = 6.5

    fig = plt.figure(facecolor = 'w', figsize = (width, height))

    #initialize GridSpec

    gs1 = plt.GridSpec(2, 1, hspace=0.1, wspace = 0.25, height_ratios=[6, 0.5])

    #define xlim

    xlim = (np.min(PTO_coords.values), np.max(PTO_coords.values))

    #define x- and y-limits for tsne plots

    x_min_tsne, x_max_tsne = np.min(tsne['x']), np.max(tsne['x'])
    y_min_tsne, y_max_tsne= np.min(tsne['y']), np.max(tsne['y'])
    x_diff_tsne, y_diff_tsne = x_max_tsne - x_min_tsne, y_max_tsne - y_min_tsne

    pad = 2.0

    if x_diff_tsne > y_diff_tsne:
        xlim_tsne = (x_min_tsne - pad, x_max_tsne + pad)
        ylim_tsne = (y_min_tsne * (x_diff_tsne / y_diff_tsne) - pad, y_max_tsne * (x_diff_tsne / y_diff_tsne) + pad)

    if x_diff_tsne < y_diff_tsne:
        xlim_tsne = (x_min_tsne * (y_diff_tsne/x_diff_tsne) - pad, x_max * (y_diff_tsne/x_diff_tsne) + pad)
        ylim_tsne = (y_min_tsne - pad, y_max_tsne + pad)

    text_pad = 2

    ax1 = plt.subplot(gs1[0,0])

    ax1.set_xlim(xlim)
    ax1.set_xticks([])

    ax1.set_ylim(0, y_max_genes[ix])
    for t in ax1.get_yticklabels():
        t.set_family('Liberation Sans')
        t.set_fontsize(size_ticklabs)
    ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

    clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

    ax1.scatter(PTO_coords,
                dataset.ix[g, PTO_coords.index],
                c = clist_tmp,
                s = 50,
                linewidth = 0.0,
                vmin = 0, vmax = 1.0,
                edgecolor = clist_tmp)

    ax1.plot(np.arange(0, np.max(PTO_coords)),
             list(fitted.ix[g]),
             color = 'black',
             linewidth = 2)

    ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

    #(b) Colorbar

    ax1b = plt.subplot(gs1[1,0])

    ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

    ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

    clean_axis(ax1b)
"""    
    plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
"""
Calculating binary logarithm of x + 1

Calculating binary logarithm of x + 1
In [104]:
dataset = log2Transform(seq)
PTO_coords = PTO_coords_IFE
cell_groups = s_groups_1st
tsne = tsne_IFE
fitted = log2Transform(IFE_fitted)
cmap = cmap_1st

cmap_tsne = plt.cm.seismic

genes = ['Ivl']
y_max_genes = [10]

#fontsizes

size_ticklabs = 30
size_ylab = 35
size_xlab = 35
size_leglab = 35

#iterate over genes

for ix, g in enumerate(genes):
    
    figname = 'v1.7_S6_G_PTO_%s.pdf' % g

    #initialize figure

    height = 6.6
    width = 6.5

    fig = plt.figure(facecolor = 'w', figsize = (width, height))

    #initialize GridSpec

    gs1 = plt.GridSpec(2, 1, hspace=0.1, wspace = 0.25, height_ratios=[6, 0.5])

    #define xlim

    xlim = (np.min(PTO_coords.values), np.max(PTO_coords.values))

    #define x- and y-limits for tsne plots

    x_min_tsne, x_max_tsne = np.min(tsne['x']), np.max(tsne['x'])
    y_min_tsne, y_max_tsne= np.min(tsne['y']), np.max(tsne['y'])
    x_diff_tsne, y_diff_tsne = x_max_tsne - x_min_tsne, y_max_tsne - y_min_tsne

    pad = 2.0

    if x_diff_tsne > y_diff_tsne:
        xlim_tsne = (x_min_tsne - pad, x_max_tsne + pad)
        ylim_tsne = (y_min_tsne * (x_diff_tsne / y_diff_tsne) - pad, y_max_tsne * (x_diff_tsne / y_diff_tsne) + pad)

    if x_diff_tsne < y_diff_tsne:
        xlim_tsne = (x_min_tsne * (y_diff_tsne/x_diff_tsne) - pad, x_max * (y_diff_tsne/x_diff_tsne) + pad)
        ylim_tsne = (y_min_tsne - pad, y_max_tsne + pad)

    text_pad = 2

    ax1 = plt.subplot(gs1[0,0])

    ax1.set_xlim(xlim)
    ax1.set_xticks([])

    ax1.set_ylim(0, y_max_genes[ix])
    for t in ax1.get_yticklabels():
        t.set_family('Liberation Sans')
        t.set_fontsize(size_ticklabs)
    ax1.set_ylabel('Gene expression [$\log_2$]', family = 'Liberation Sans', fontsize = size_ylab)

    clist_tmp = [cmap[cell_groups[ix]] for ix in PTO_coords.index]

    ax1.scatter(PTO_coords,
                dataset.ix[g, PTO_coords.index],
                c = clist_tmp,
                s = 50,
                linewidth = 0.0,
                vmin = 0, vmax = 1.0,
                edgecolor = clist_tmp)

    ax1.plot(np.arange(0, np.max(PTO_coords)),
             list(fitted.ix[g]),
             color = 'black',
             linewidth = 2)

    ax1.text(ax1.get_xlim()[1] * 0.05, ax1.get_ylim()[1] * 0.925, g, fontsize = size_leglab, family = 'Liberation Sans', va = 'center', weight = 'bold')

    #(b) Colorbar

    ax1b = plt.subplot(gs1[1,0])

    ax1b.matshow(np.matrix(np.arange(0, np.max(PTO_coords_IFE))), cmap = cmap_Gn_Yl_Rd, aspect = 'auto')

    ax1b.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = size_xlab, labelpad = 15)

    clean_axis(ax1b)
"""    
    plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
"""
Calculating binary logarithm of x + 1

Calculating binary logarithm of x + 1
Out[104]:
"    \n    plt.savefig('%s/%s' % (path_figures, figname), \n            format = 'pdf', \n            transparent = True, \n            bbox_inches = 'tight', \n            pad_inches = 0, \n            rasterized = True)\n"
In [ ]:
 

Rolling wave plot

In [73]:
#normalize

IFE_splines_norm = IFE_fitted.ix[g_groups_IFE.index].apply(lambda x: x / np.max(x), axis = 1)
In [74]:
IFE_InPeDe = PTO_define_peak(IFE_splines_norm, cutoff = 0.75)
In [75]:
g_groups_IFE = PTO_order_groups_InPe(g_groups_IFE, IFE_InPeDe)
In [76]:
len(g_groups_IFE)
Out[76]:
1627
In [77]:
dataset = IFE_splines_norm.ix[g_groups_IFE.index]
cell_groups = s_groups_1st
gene_groups = g_groups_IFE
PTO_coords = PTO_coords_IFE
cmap = cmap_1st
cmap_g = cmap_g_IFE
nmap_g = nmap_g_IFE

#initialize figure

height = 20
width = 20

fig = plt.figure(facecolor = 'w', figsize = (width, height))

gs1 = plt.GridSpec(3, 2, hspace=0.025, wspace=0.025, height_ratios=[1,18,1], width_ratios = [19, 1])

#draw heatmap in pseudotime

ax0 = plt.subplot(gs1[1,0])
ax0.set_ylim(len(dataset.index), 0)

ax0.matshow(dataset, aspect = 'auto', cmap = plt.cm.RdBu_r, vmin = 0, vmax = 1.0)

remove_ticks(ax0)
clean_axis(ax0)

ix_tmp = list(dataset.index)

ax0.set_yticks([0, 500, 1000, len(dataset.index), ix_tmp.index('Krt14'), ix_tmp.index('Mt4'), ix_tmp.index('Krt10'), ix_tmp.index('Lor')])
ax0.set_yticklabels([0, 500, 1000, len(dataset.index), 'Krt14 - ', 'Mt4 - ', 'Krt10 - ', 'Lor - '], 
                    family = 'Liberation Sans', fontsize = 35)

#s_group colors

ax1 = plt.subplot(gs1[0,0])

ax1.set_xlim(0, np.max([float(val) for val in dataset.columns]))

for ix in PTO_coords.index:
    
    ax1.vlines(x = PTO_coords[ix], ymin = 0, ymax = 1, 
               color = cmap[cell_groups[ix]], linewidth = 1.0)
    
#remove_ticks(ax1)
clean_axis(ax1)
    
#PTO colors

ax2 = plt.subplot(gs1[2,0])

ax2.matshow(np.matrix(range(0, int(np.max([float(val) for val in dataset.columns])))), 
            cmap = cmap_Gn_Yl_Rd,
            aspect = 'auto')
    
ax2.set_xlabel('Differentiation axis', family = 'Liberation Sans', fontsize = 40, labelpad = 20)

ax2.text(0, 0.75, 'basal', family = 'Liberation Sans', fontsize = 35, va = 'top', ha = 'center')

ax2.text(np.max([float(val) for val in dataset.columns]), 0.75, 'terminally\ndifferentiated', 
         family = 'Liberation Sans', fontsize = 35,  va = 'top', ha = 'center')

clean_axis(ax2)

#genes group identity

ax3 = plt.subplot(gs1[1,1])

ax3.set_ylim(len(dataset.index),0)

for pos, gr in enumerate(gene_groups.values):
    
    ax3.axhspan(pos, pos + 1, color = cmap_g[gr])
                
#gene group names

y_ticks = []

for gr in return_unique(gene_groups):
    
    start_tmp = list(gene_groups.values).index(gr)
    len_tmp = len(gene_groups[gene_groups==gr])
    y_ticks.append(start_tmp + (len_tmp * 0.5))
    
clean_axis(ax3)
    
ax3.set_yticks(y_ticks)
ax3.yaxis.set_ticks_position('right')
ax3.set_yticklabels([nmap_g[gr] for gr in return_unique(gene_groups)], family = 'Liberation Sans', fontsize = 35)
ax3.tick_params(axis='y', which='major', pad=15)

#draw lines

for gr in set(gene_groups):
    
    pos = list(gene_groups).index(gr)
    
    ax0.axhline(pos, color = 'white', linewidth = 5)
    ax3.axhline(pos, color = 'white', linewidth = 5)

figname = 'v1.8_3_C_Diff_rolling_wave.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [91]:
#initialize figure

height = 10
width = 1

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#draw

axLabel = plt.subplot()

axLabel.set_ylim(0,1)

for pos in np.arange(1.0, 0.0, -0.001):
    axLabel.axhspan(pos, pos + 0.001, color = plt.cm.RdBu_r(pos))


axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')

clean_axis(axLabel)

axLabel.set_yticks([1.0,0.0])
axLabel.set_yticklabels(['max','0'], family = 'Liberation Sans', fontsize = 30, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)

axLabel.yaxis.set_label_coords(2, 0.5)
axLabel.set_ylabel('Gene expression', family = 'Liberation Sans', fontsize = 40, va = 'center', ha = 'center')


figname = 'v1.8_3_C_Legend_expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Transcription factors

In [92]:
#select TFs among pseudotime dependent genes

TF_mm9 = open('%s/TF_mm9.txt' % path_input,'r').read().split()

TF_IFE = set(g_groups_IFE.index & TF_mm9)

#remove immediate early genes

IEG = ['Atf3','Atf4','Jun','Fos','Fosl1','Jund']

TF_IFE = list(set(TF_IFE) - set(IEG))
In [93]:
len(TF_IFE)
Out[93]:
65
In [94]:
#select most pseudotime dependent TFS

TF_IFE_sel = IFE_stats.ix[TF_IFE, 'Pr(>Chisq)'].order()[:30].index
In [95]:
TF_IFE_sel_order = pd.Series(index = TF_IFE_sel)

for g in TF_IFE_sel:
    TF_IFE_sel_order[g] = list(g_groups_IFE.index).index(g)
In [100]:
#data

TF_sel = TF_IFE_sel_order
TF_bold = ['Hoxa7', 'Hes1', 'Grhl1', 'Prdm1','Gata3']
splines = IFE_splines_norm
p_val = -np.log10(IFE_stats['Pr(>Chisq)'])
cutoff = -np.log10(bonferroni_IFE)
gene_groups = g_groups_IFE
cmap_gene_groups = cmap_g_IFE
cmap_splines = plt.cm.RdYlBu_r
cmap_colorbar = cmap_Gn_Yl_Rd

#initialize figure

height = 20
width = 20

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#initialize GridSpec

gs = plt.GridSpec(2, 4, hspace=0.025, wspace=0.05, height_ratios=[19.25,0.75], width_ratios=[3,1,11,5])

#plot TF names

ax = plt.subplot(gs[0,0])

ax.set_xlim(0,1)
ax.set_ylim(len(TF_sel.index) - 0.5, -0.5)

for pos, g in enumerate(TF_sel.order().index):
    
    if g in TF_bold:
        ax.text(1.0, pos, g, family = 'Liberation Sans', fontsize = 35, 
                ha = 'right', va = 'center', fontweight = 'bold')
        
    else:
        ax.text(1.0, pos, g, family = 'Liberation Sans', fontsize = 35, 
                ha = 'right', va = 'center')
        
clean_axis(ax)

#plot TF groups

ax = plt.subplot(gs[0,1])

ax.set_xlim(0,1)
ax.set_ylim(len(TF_sel.index), 0.0)

lw = 1

for pos, g in enumerate(TF_sel.order().index):
    ax.axhspan(pos, pos + 1, color = cmap_gene_groups[gene_groups[g]])
    ax.axhline(pos, color = 'black', linewidth = lw)
    
ax.axhline(pos + 0.99, color = 'black', linewidth = lw)    
ax.axvline(0, color = 'black', linewidth = lw)
ax.axvline(0.99, color = 'black', linewidth = lw)

clean_axis(ax)

#plot spline heatmap

ax = plt.subplot(gs[0,2])

ax.matshow(splines.ix[TF_sel.order().index], aspect = 'auto', cmap = cmap_splines, vmin = 0, vmax = 1.0)

for pos, g in enumerate(TF_sel.order().index):
    ax.axhline(pos + 0.5, color = 'white', linewidth = lw)

clean_axis(ax)

#plot spline colorbar

ax = plt.subplot(gs[1,2])

x_max = np.max([float(x) for x in splines.columns])

ax.set_xlim(0, x_max)

for pos in np.arange(0, x_max):
    ax.axvspan(pos, pos + 1, color = cmap_colorbar(pos / x_max))
    
ax.set_xlabel('Differentiation axis', family = 'Liberation Sans', fontsize = 40, labelpad = 20)

clean_axis(ax)

#plot p-value

ax = plt.subplot(gs[0,3])

ax.spines['right'].set_color('none')

ax.set_xlim(0, p_val.ix[TF_sel.order().index].max())
ax.set_ylim(len(TF_sel.index) - 0.5, -0.5)

ax.barh(bottom = [x - 0.4 for x in range(len(TF_sel.index))], 
        width = p_val.ix[TF_sel.order().index], 
        height = 0.8, color = 'darkgrey', linewidth = 0)

ax.axvline(cutoff, color = 'red', linewidth = 3)

ax.set_xlabel('P-value [$-\log_{10}$]', 
              family = 'Liberation Sans', fontsize = 35)
#ax.xaxis.set_label_coords(0.5, -0.075)

for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(35) 
                tick.label.set_family('Liberation Sans')
            
ax.set_yticks([])

figname = 'v1.8_3_D_Diff_TFs.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [101]:
#initialize figure

height = 10
width = 1

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#draw

axLabel = plt.subplot()

axLabel.set_ylim(0,1)

for pos in np.arange(1.0, 0.0, -0.001):
    axLabel.axhspan(pos, pos + 0.001, color = plt.cm.RdYlBu_r(pos))


axLabel.xaxis.set_ticks([])
axLabel.yaxis.set_ticks_position('right')

clean_axis(axLabel)

axLabel.set_yticks([1.0,0.0])
axLabel.set_yticklabels(['max','0'], family = 'Liberation Sans', fontsize = 30, va = 'center')
axLabel.tick_params(axis='y', which='major', pad=10)

axLabel.yaxis.set_label_coords(2, 0.5)
axLabel.set_ylabel('TF expression', family = 'Liberation Sans', fontsize = 40, va = 'center', ha = 'center')


figname = 'v1.8_3_D_Legend_expression.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Gene regulatory network inference

In [150]:
g_IFE_corr_dist = log2Transform(seq.ix[g_groups_IFE.index,PTO_coords_IFE.index]).T.corr()
Calculating binary logarithm of x + 1
In [151]:
g_IFE_CLR = GRN_CLR(g_IFE_corr_dist)
In [152]:
g_IFE_shNN = GRN_shared_NN(g_IFE_CLR, 25, 5)
Find 25 nearest neighbors for each gene

Find shared nearest neighbors

Drop all edges between genes with less than 5 shared nearest neighbors
In [154]:
G, G_pos = GRN_create_nx(g_IFE_shNN, drop_alone=True)
In [181]:
#initialize figure

height = 19
width = 19

fig = plt.figure(facecolor = 'w', figsize = (width, height))

#define axis limits

x_pos = [val[0] for val in G_pos.values()]
y_pos = [val[1] for val in G_pos.values()]

offset = 50

xlim = (np.min(x_pos) - offset, np.max(x_pos) + offset)
ylim = (np.min(y_pos) - offset, np.max(y_pos) + offset)

############################################################################

ax0 = plt.subplot()

#set axlims

ax0.set_xlim(xlim[0], xlim[1])
ax0.set_ylim(ylim[0], ylim[1])

#draw nodes


nx.draw_networkx_nodes(G, 
                       G_pos,
                       ax = ax0,
                       node_size = 300,
                       node_color = [cmap_g_IFE[g_groups_IFE[g]] for g in G.nodes()],
                       linewidths = 0.0)

nx.draw_networkx_edges(G,
                       G_pos,
                       ax=ax0, 
                       width = 0.025,
                       edge_color='black')

clean_axis(ax0)

figname = 'v1.8_S4_C_Gene_networks.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [223]:
def GNR_mark_gene(G, G_pos, gene):
    
    #initialize figure

    height = 19
    width = 19

    fig = plt.figure(facecolor = 'w', figsize = (width, height))

    #define axis limits

    x_pos = [val[0] for val in G_pos.values()]
    y_pos = [val[1] for val in G_pos.values()]

    offset = 50

    xlim = (np.min(x_pos) - offset, np.max(x_pos) + offset)
    ylim = (np.min(y_pos) - offset, np.max(y_pos) + offset)

    ############################################################################

    ax0 = plt.subplot()

    #set axlims

    ax0.set_xlim(xlim[0], xlim[1])
    ax0.set_ylim(ylim[0], ylim[1])
    
    #define nodecolor
    
    nodecolor = []
    
    for g in G.nodes():
        
        if g == gene:
            nodecolor.append('red')
        
        else:
            nodecolor.append('silver')

    #draw nodes


    nx.draw_networkx_nodes(G, 
                           G_pos,
                           ax = ax0,
                           node_size = 300,
                           node_color = nodecolor,
                           linewidths = 0.0)

    nx.draw_networkx_edges(G,
                           G_pos,
                           ax=ax0, 
                           width = 0.025,
                           edge_color='black')

    clean_axis(ax0)
In [227]:
GNR_mark_gene(G, G_pos, 'Lor')

Diff. gene groups - averaged splines

In [158]:
dataset = IFE_splines_norm
g_groups = g_groups_IFE

for pos, gr in enumerate(return_unique(g_groups)):
    
    figname = 'v1.8_S4_D1-%s_averaged_splines.pdf' % pos

    genes_sel = g_groups[g_groups==gr].index

    x_min = - max(dataset.columns.astype(float)) * 0.1
    x_max = max(dataset.columns.astype(float)) * 1.1

    y_min = -0.1
    y_max = 1.1

    fig = plt.figure(facecolor = 'w', figsize = (8,8))

    gs = plt.GridSpec(3, 1, hspace=0.1, wspace=0.00, height_ratios=[6.5,0.5,1.0])

    ax = plt.subplot(gs[0])

    ax.set_xlim(x_min, x_max)
    ax.set_ylim(y_min, y_max)

    #plot mean

    ax.plot([float(val) for val in dataset.columns], 
            [val for val in dataset.ix[genes_sel].median(axis = 0)], 
            linewidth = 5, color = 'red')

    ax.scatter([float(dataset.columns[0]), float(dataset.columns[-1])], 
               [dataset.ix[genes_sel].median(axis = 0).values[0], dataset.ix[genes_sel].median(axis = 0).values[-1]], 
               s = 200, c = 'red', linewidths = 0)

    #Plot percentile 1

    perc_1 = [np.percentile(dataset.ix[genes_sel, pos], 25) for pos in dataset.columns]

    ax.plot([float(val) for val in dataset.columns],
            perc_1, 
            linewidth = 2, color = 'dimgrey', zorder = 0)

    ax.scatter([float(dataset.columns[0]), float(dataset.columns[-1])], 
               [perc_1[0], perc_1[-1]], 
               s = 100, c = 'dimgrey', linewidths = 0, zorder = 0)

    #Plot percentile 2

    perc_2 = [np.percentile(dataset.ix[genes_sel, pos], 75) for pos in dataset.columns]

    ax.plot([float(val) for val in dataset.columns],
            perc_2, 
            linewidth = 2, color = 'dimgrey', zorder = 0)

    ax.scatter([float(dataset.columns[0]), float(dataset.columns[-1])], 
               [perc_2[0], perc_2[-1]], 
               s = 100, c = 'dimgrey', linewidths = 0, zorder = 0)

    clean_axis(ax)

    #plot pseudotime bar

    ax = plt.subplot(gs[1])

    ax.set_xlim(x_min, x_max)

    for pos in np.arange(0, max(dataset.columns.astype(float)), 1):
        ax.axvspan(pos, pos + 1, color = cmap_Gn_Yl_Rd(float(pos) / max(dataset.columns.astype(float))))

    ax.set_xlabel('Pseudotime', family = 'Liberation Sans', fontsize = 40, labelpad = 10)

    clean_axis(ax)

    #plot spaceholder axis

    ax = plt.subplot(gs[2])
    ax.set_xlim(x_min, x_max)
    clean_axis(ax)
    
    plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Diff. gene groups - network

In [159]:
g_groups = g_groups_IFE

for pos, gr in enumerate(return_unique(g_groups)):
    
    figname = 'v1.8_S4_D2-%s_network.pdf' % pos

    #define gr indices

    genes_sel = list(set(g_groups[g_groups==gr].index))
                     
    #initialize figure

    height = 8
    width = 8

    fig = plt.figure(facecolor = 'w', figsize = (width, height))

    #define axis limits

    x_pos = [val[0] for val in G_pos.values()]
    y_pos = [val[1] for val in G_pos.values()]

    offset = 50

    xlim = (np.min(x_pos) - offset, np.max(x_pos) + offset)
    ylim = (np.min(y_pos) - offset, np.max(y_pos) + offset)

    ############################################################################

    ax0 = plt.subplot()

    #set axlims

    ax0.set_xlim(xlim[0], xlim[1])
    ax0.set_ylim(ylim[0], ylim[1])

    #draw nodes

    nx.draw_networkx_nodes(G, 
                           G_pos,
                           nodelist = [n for n in G.nodes() if n not in genes_sel],
                           ax = ax0,
                           node_size = 50,
                           node_color = 'silver',
                           linewidths = 0.0)

    nx.draw_networkx_nodes(G, 
                           G_pos,
                           nodelist = [n for n in G.nodes() if n in genes_sel],
                           ax = ax0,
                           node_size = 50,
                           node_color = 'red',
                           linewidths = 0.0)
    
    clean_axis(ax0)
    
    plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [161]:
for gr in return_unique(g_groups_IFE):
    
    TF_tmp = [ix for ix in g_groups_IFE[g_groups_IFE==gr].index if ix in TF_IFE]
    
    print gr, TF_tmp
1 ['Bhlhe40', 'Snai2', 'Tsc22d1', 'Klf9']
5 ['Sox6', 'Trp63', 'Zfp36l2']
0 ['Hes1', 'Gata3', 'Pou3f1', 'Tsc22d3', 'Ikzf2']
2 ['Phb', 'Aff4', 'Tet3', 'Zmat3', 'Irf6', 'Srebf2', 'Nfatc2', 'Creb3', 'Ppp1r13l']
3 ['Cbfb', 'Pou2f3', 'Tcf25', 'Kdm5b', 'Zfp750', 'Mta3', 'Grhl3', 'Cebpa', 'Zbtb7a', 'Nfe2l1', 'Tsc22d4', 'Zfp91', 'Casz1', 'Ovol1', 'Zc3h15']
7 ['Klf4', 'Mafk', 'Nfat5', 'Bach1', 'Rora', 'Jarid2', 'Mllt4', 'Klf3', 'Xbp1', 'Grhl1', 'Hivep1', 'Lrrfip2', 'Rel', 'Egr2', 'Hopx', 'Sp6', 'Maff', 'Csrnp2', 'Cebpb']
4 ['Mxi1', 'Tead1', 'Rorc', 'Zfp706', 'Max', 'Hbp1', 'Tsc22d2', 'Mxd1', 'Prdm1']
6 ['Id4']

Expression of diff.-related genes over 2nd level clusters

In [104]:
cell_groups = s_groups_2nd
gene_groups = g_groups_IFE
NBR_bin_bl = NBR_2nd_bin_bl

g_bin = pd.DataFrame(0, columns = return_unique(cell_groups), index = set(gene_groups))

for gr in return_unique(cell_groups): 
        
    for g_gr in set(gene_groups):
        
        g_ix = gene_groups[gene_groups==g_gr].index
        
        g_bin.ix[g_gr, gr] = NBR_bin_bl.ix[g_ix, gr].sum()
In [137]:
data = g_bin
cell_groups = s_groups_2nd
gene_groups = g_groups_IFE
exclude = [7,8,9,14,20,23,24]

#initialize figure

fig = plt.figure(facecolor = 'w', figsize = (25,15))

height_ratios = [8] + [3] * len(set(gene_groups))

gs = plt.GridSpec(len(set(gene_groups)) + 1, 2, hspace=0.0, wspace=0.05, width_ratios=[19,1], 
                  height_ratios = height_ratios)

#plot cell group names

ax = plt.subplot(gs[0, 0])
ax.set_xlim(-0.5, len(set(cell_groups))-0.5)
ax.set_ylim(0,1)

for pos, gr in enumerate(return_unique(cell_groups)):
    
    if pos % 2 == 0:
        ax.axvspan(pos-0.5,pos+0.5, 0.00, 1.0, color = '#E6E7E8', zorder = 0)
        
    ax.scatter(pos, 0.15, color = cmap_2nd[gr], s = markers_2nd_size[gr], marker = markers_2nd[gr])
    ax.text(pos + 0.1, 0.3, nmap_2nd_short[gr], family = 'Liberation Sans', fontsize = 40, 
            rotation = 'vertical', va = 'bottom', ha = 'center')
        
clean_axis(ax)

#iterate over gene groups

for pos, g_gr in enumerate(return_unique(gene_groups)):
    
    #plot barplots
        
    ax0 = plt.subplot(gs[pos + 1, 0])
    
    ax0.set_xlim(-0.5, len(set(cell_groups)) - 0.5)
    ax0.set_ylim(0, len(gene_groups[gene_groups==g_gr].index))
    
    ax0.set_xticks([])
    
    ax0.set_yticks([len(gene_groups[gene_groups==g_gr].index) * 0.5, len(gene_groups[gene_groups==g_gr].index)])
    ax0.set_yticklabels([50,100], family = 'Liberation Sans', fontsize = 35)
        
    for pos_gr, gr in enumerate(return_unique(cell_groups)):
        ax0.bar(pos_gr-0.4, g_bin.ix[g_gr, gr], color = cmap_2nd[gr], linewidth = 0)
        
    if pos == 3:
        
        ax0.yaxis.set_label_coords(-0.075, 0.5)
        ax0.set_ylabel('Expression of pseudotime-dependent\ngenes over Baseline [%]', 
                       family = 'Liberation Sans', fontsize = 40)
        
    #plot exclusion markers
    
    for pos_excl, gr in enumerate(return_unique(cell_groups)):
        
        if gr in exclude:
            
            ax0.axvspan(pos_excl - 0.5, pos_excl + 0.5, color = 'silver', alpha = 0.2, linewidth = 0)
    
    #plot color bar
    
    ax1 = plt.subplot(gs[pos + 1, 1])
    
    ax1.axhspan(0,1,0,1, color = cmap_g_IFE[g_gr])
    
    ax1.yaxis.set_label_position('right')
    ax1.yaxis.set_label_coords(1.25, 0.5)
    ax1.set_ylabel(nmap_g_IFE[g_gr], family = 'Liberation Sans', fontsize = 40, 
                   rotation = 'horizontal', ha = 'left', va = 'center')
    
    clean_axis(ax1)
   
figname = 'v1.8_3_E_Diff_genes_2nd.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Pseudotime correlation

In [149]:
data = IFE_corr_max
cell_groups = s_groups_2nd
cmap = cmap_2nd
exclude = [7,8,9,14,20,23,24]

#initialize figure

height = 10
width = 25

fig = plt.figure(facecolor = 'w', figsize = (width, height))

gs = plt.GridSpec(1, 2, hspace = 0.05, wspace=0.05, width_ratios=[19,1])

#define axis

ax0 = fig.add_subplot(gs[0,0])

ax0.set_ylim(np.max(data),0)
ax0.set_yticks([])

ax0.set_xlim(-0.5, len(return_unique(cell_groups)) - 0.5)
ax0.set_xticks([])

#plot data

for pos, gr in enumerate(return_unique(cell_groups)):
    
    cell_ix_tmp = cell_groups[cell_groups==gr].index
    
    if gr in exclude:
        
        ax0.scatter([pos - 0.5 + np.random.random() for x in range(len(cell_ix_tmp))],
                [float(data[ix]) for ix in cell_ix_tmp],
                color = '#E6E7E8',
                s = 75)
    else:
    
        ax0.scatter([pos - 0.5 + np.random.random() for x in range(len(cell_ix_tmp))],
                    [float(data[ix]) for ix in cell_ix_tmp],
                    color = cmap[gr],
                    s = 75)

#define colorbar

ax1 = fig.add_subplot(gs[0,1])

ax1.set_xlim(0,1)
ax1.set_ylim(np.max(data),0)

ax1.set_xticks([])
ax1.set_yticks([])

for pos in np.arange(0, np.max(data)):
    
    ax1.axhspan(pos, pos + 1, color = cmap_Gn_Yl_Rd(float(pos) / max(data.astype(float))))
    
ax1.yaxis.set_label_coords(1.2, 0.5)
ax1.set_ylabel('Differentiation axis', family = 'Liberation Sans', fontsize = 40, va = 'top')

clean_axis(ax1)

figname = 'v1.8_3_F_Diff_correlation.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Pseudotime correlation p-value

In [163]:
IFE_corr, IFE_corr_p = PTO_correlate(log2Transform(seq), log2Transform(IFE_fitted), s_groups_2nd.index, g_groups_IFE.index, return_p=True)
Calculating binary logarithm of x + 1

Calculating binary logarithm of x + 1
In [167]:
IFE_corr_p_min = IFE_corr_p.min(axis = 1)
In [184]:
data = -np.log10(IFE_corr_p_min + 3e-324) #to prevent 0.000
cell_groups = s_groups_2nd
cmap = cmap_2nd

#initialize figure

height = 15
width = 25

fig = plt.figure(facecolor = 'w', figsize = (width, height))

gs = plt.GridSpec(2, 2, hspace = 0.0, height_ratios = [4,11], wspace=0.05, width_ratios=[19,1])

#plot cell group names

ax = plt.subplot(gs[0,0])
ax.set_xlim(-0.5, len(set(cell_groups))-0.5)
ax.set_ylim(0,1)

for pos, gr in enumerate(return_unique(cell_groups)):
    
    if pos % 2 == 0:
        ax.axvspan(pos-0.5,pos+0.5, 0.005, 1.0, color = '#F4F4F4', zorder = 0)
        
    ax.scatter(pos, 0.15, color = cmap_2nd[gr], s = markers_2nd_size[gr], marker = markers_2nd[gr])
    ax.text(pos + 0.1, 0.3, nmap_2nd_short[gr], family = 'Liberation Sans', fontsize = 40, 
            rotation = 'vertical', va = 'bottom', ha = 'center')
        
clean_axis(ax)

#define axis

ax0 = fig.add_subplot(gs[1,0])

ax0.set_ylim(np.max(data),0)
ax0.set_yticks([])

ax0.set_xlim(-0.5, len(return_unique(cell_groups)) - 0.5)
ax0.set_xticks([])

ax0.set_ylim(0, 350)
ax0.set_yticks([0,50,100,150,200,250,300,350])
ax0.set_yticklabels([0,50,100,150,200,250,300,350], family = 'Liberation Sans', fontsize = 35)
ax0.set_ylabel('Pearson correlation\nP-value [$-log_{10}$]', family = 'Liberation Sans', fontsize = 40, rotation = 'vertical', ha = 'center', va = 'center')
ax0.yaxis.set_label_coords(-0.1, 0.5)


#plot data

for pos, gr in enumerate(return_unique(cell_groups)):
    
    cell_ix_tmp = cell_groups[cell_groups==gr].index
    
    ax0.scatter([pos - 0.5 + np.random.random() for x in range(len(cell_ix_tmp))],
                [float(data[ix]) for ix in cell_ix_tmp],
                color = cmap[gr],
                s = 50)
    
    ax0.hlines(y =  data.ix[cell_ix_tmp].median(),
                    xmin = pos - 0.5, xmax = pos + 0.5, linewidth = 3, color = 'black')
    
    if pos % 2 == 0:
        c = '#d1d2d4'
        
    if pos % 2 == 1:
        c = '#939597'
    
figname = 'v1.8_S4_E_Diff_correlation_p-value.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Pseudotime correlation robustness

In [60]:
IFE_corr_robustness_distance = loadData_v1(path_output, exp_id, 'IFE_corr_robustness_distance', 'DataFrame')
In [103]:
data = IFE_corr_robustness_distance.mean()
cell_groups = s_groups_2nd
cmap = cmap_2nd

#initialize figure

height = 8
width = 25

fig = plt.figure(facecolor = 'w', figsize = (width, height))

gs = plt.GridSpec(1, 2, hspace = 0.0, wspace=0.05, width_ratios=[19,1])
"""
#plot cell group names

ax = plt.subplot(gs[0,0])
ax.set_xlim(-0.5, len(set(cell_groups))-0.5)
ax.set_ylim(0,1)

for pos, gr in enumerate(return_unique(cell_groups)):
    
    if pos % 2 == 0:
        ax.axvspan(pos-0.5,pos+0.5, 0.005, 1.0, color = '#F4F4F4', zorder = 0)
        
    ax.scatter(pos, 0.15, color = cmap_2nd[gr], s = markers_2nd_size[gr], marker = markers_2nd[gr])
    ax.text(pos + 0.1, 0.3, nmap_2nd_short[gr], family = 'Liberation Sans', fontsize = 40, 
            rotation = 'vertical', va = 'bottom', ha = 'center')
        
clean_axis(ax)
"""
#define axis

ax0 = fig.add_subplot(gs[0,0])

ax0.set_ylim(np.max(data),0)
ax0.set_yticks([])

ax0.set_xlim(-0.5, len(return_unique(cell_groups)) - 0.5)
ax0.set_xticks([])

ax0.set_ylim(200,0)
ax0.set_yticks([0,50,100,150,200,250])
ax0.set_yticklabels([0,50,100,150,200,250], family = 'Liberation Sans', fontsize = 35)
ax0.set_ylabel('Correlation robustness', family = 'Liberation Sans', fontsize = 40, rotation = 'vertical', ha = 'center', va = 'center')
ax0.yaxis.set_label_coords(-0.075, 0.5)


#plot data

for pos, gr in enumerate(return_unique(cell_groups)):
    
    cell_ix_tmp = cell_groups[cell_groups==gr].index
    
    ax0.scatter([pos - 0.5 + np.random.random() for x in range(len(cell_ix_tmp))],
                [float(data[ix]) for ix in cell_ix_tmp],
                color = cmap[gr],
                s = 50)
    
    ax0.hlines(y =  data.ix[cell_ix_tmp].median(),
                    xmin = pos - 0.5, xmax = pos + 0.5, linewidth = 3, color = 'black')
    
    if pos % 2 == 0:
        c = '#d1d2d4'
        
    if pos % 2 == 1:
        c = '#939597'

figname = 'v1.8_S4_F_Diff_correlation_robustness.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Validation - uHF pseudotime

In [79]:
uHF_stats = loadData_v1(path_output, exp_id, 'uHF_stats', 'DataFrame')
In [81]:
bonferroni = 0.001 / len(IFE_stats.index)
In [99]:
#create data

x_data = -np.log10(IFE_stats.ix[IFE_stats.index, 'Pr(>Chisq)']).fillna(0)
y_data = -np.log10(uHF_stats.ix[IFE_stats.index, 'Pr(>Chisq)']).fillna(0)

#create color list

clist = []

for g in IFE_stats.index:
    if x_data[g] < -np.log10(bonferroni) and y_data[g] < -np.log10(bonferroni):
        clist.append('silver')
    elif x_data[g] > -np.log10(bonferroni) and y_data[g] < -np.log10(bonferroni):
        clist.append('#00FF00')
    elif x_data[g] < -np.log10(bonferroni) and y_data[g] > -np.log10(bonferroni):
        clist.append('#00FF00')
    elif x_data[g] > -np.log10(bonferroni) and y_data[g] > -np.log10(bonferroni):
        clist.append('green')

#initialize figure

height = 10
width = 10

fig = plt.figure(facecolor = 'w', figsize = (width, height))

ax = plt.subplot()

#define x-axis

ax.set_xlim(x_data.min(), x_data.max())

ax.set_xlabel('IFE - Pseudotime dependency\n- P-value [$-\log_{10}$]', family = 'Liberation Sans', fontsize = 40)
ax.xaxis.set_label_coords(x = 0.5, y = -0.075)
for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(35) 
                tick.label.set_family('Liberation Sans')   

#define y-axis

ax.set_ylim(y_data.min(), y_data.max())

ax.set_ylabel('uHF - Pseudotime dependency\n- P-value [$-\log_{10}$]', family = 'Liberation Sans', fontsize = 40)
ax.yaxis.set_label_coords(y = 0.5, x = -0.075)
for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(35) 
                tick.label.set_family('Liberation Sans')   

#plot data

ax.scatter(x_data,
           y_data,
           c = clist, 
           linewidth = 0, 
           s = 50, 
           edgecolor = clist)

ax.axhline(y = -np.log10(bonferroni), c = 'k', linewidth = 2)
ax.axvline(x = -np.log10(bonferroni), c = 'k', linewidth = 2)

ax.text(ax.get_xlim()[1] * 0.5, 
        ax.get_ylim()[1] * 0.95,
        'R = %.2f' % scipy.stats.pearsonr(x_data.values, y_data.values)[0],
        family = 'Liberation Sans', 
        fontsize = 40, 
        color = 'red', 
        ha = 'center', va = 'center')

figname = 'v1.8_S4_G_Validation_uHF_PTO_Gene_Corr.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)

Correlation based on IFE and uHF data

In [92]:
PTO_coords_uHF = loadData_v1(path_output, exp_id, 'PTO_coords_uHF', 'Series')
uHF_corr_max = loadData_v1(path_output, exp_id, 'uHF_corr_max', 'Series')
In [100]:
cell_groups = s_groups_2nd
cmap = cmap_2nd

#cells = list(set(PTO_coords_uHF.index) | set(PTO_coords_IFE.index))
cells = cell_groups[~cell_groups.isin([7,8,9,14,20,23,24])].index

x_data = IFE_corr_max[cells]
y_data = uHF_corr_max[cells]

#initialize figure

height = 10
width = 10
    
fig = plt.figure(facecolor = 'w', figsize = (width, height))

#initialize GridSpec

gs1 = plt.GridSpec(2, 2, hspace=0.1, wspace = 0.05, height_ratios=[14, 1], width_ratios = [1, 14])

#IFE correlation legend

ax = plt.subplot(gs1[1,1])
ax.set_xlim(0, np.max(x_data))
for pos in np.arange(0, np.max(x_data)):
    ax.axvspan(pos, pos + 1, color = cmap_Gn_Yl_Rd(pos / np.max(x_data)))
ax.set_xlabel('Pseudotime - IFE', family = 'Liberation Sans', fontsize = 40, labelpad = 15)

clean_axis(ax)

#uHF correlation legend

ax = plt.subplot(gs1[0,0])
ax.set_ylim(0, np.max(y_data))
for pos in np.arange(0, np.max(y_data)):
    ax.axhspan(pos, pos + 1, color = cmap_Gn_Yl_Rd(pos / np.max(y_data)))
ax.set_ylabel('Pseudotime - uHF', family = 'Liberation Sans', fontsize = 40, labelpad = 15)

clean_axis(ax)

#main plot

ax = plt.subplot(gs1[0,1])

ax.set_xlim(0, np.max(x_data))
ax.set_ylim(0, np.max(y_data))

clist = 'dodgerblue'#[cmap_2nd[cell_groups[ix]] for ix in cells]

remove_ticks(ax)

ax.scatter(x_data.values, 
           y_data.values, 
           s = 50, 
           color = clist, 
           edgecolor = clist)

ax.text(ax.get_xlim()[1] * 0.5, 
        ax.get_ylim()[1] * 0.95,
        'R = %.2f' % scipy.stats.pearsonr(x_data.values, y_data.values)[0],
        family = 'Liberation Sans', 
        fontsize = 40, 
        color = 'red', 
        ha = 'center', va = 'center')

figname = 'v1.7_S4_G_Validation_uHF_PTO_Cell_Corr.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)
In [176]:
#initialize figure

height = 11
width = 1
    
fig = plt.figure(facecolor = 'w', figsize = (width, height))

#pseudotime legend

ax = plt.subplot(111)
ax.set_ylim(0, 1)
for pos in np.arange(0, 1, 0.01):
    ax.axhspan(pos, pos + 1, color = cmap_Gn_Yl_Rd(pos))
ax.set_ylabel('Differentiation axis - Pseudotime', family = 'Liberation Sans', fontsize = 40, labelpad = 15)

clean_axis(ax)

figname = 'v1.8_3_Differentiation_legend.pdf'
plt.savefig('%s/%s' % (path_figures, figname), 
            format = 'pdf', 
            transparent = True, 
            bbox_inches = 'tight', 
            pad_inches = 0, 
            rasterized = True)